# 云筑集采的招标信息列表页生产者
import random
import time
import pymysql
import redis
import requests
from msg_queue import connect_message_queue
from user_agent_kuaidaili_proxy import get_random_ua
import datetime
import math

redis_test_url = "redis://192.168.1.9:6379/5"
proxy = {'http': 'http://t10635913038651:09ht71vf@tps152.kdlapi.com:15818/', 'https': 'http://t10635913038651:09ht71vf@tps152.kdlapi.com:15818/'}
REDIS_PASSWORD = None


class YzwTender(object):
    def __init__(self):
        self.redis_url = redis_test_url
        self.redis_conn = redis.Redis(host='localhost', port=6379, db=5, password=REDIS_PASSWORD)

    def distinct_redis(self, data):
        if self.redis_conn.sismember('yzw_tender_set', data):
            print('tender_code已存在跳过')
            return True
        else:
            self.redis_conn.sadd('yzw_tender_set', data)
            return False

    def distinct_retry_data_redis(self, url):
        if self.redis_conn.sismember('retry_yzw_tender_set', url):
            print('招标文件已存在跳过')
            return True
        else:
            self.redis_conn.sadd('retry_yzw_tender_set', url)
            return False

    def get_tender(self,data):
        tender_url = 'https://yzmtg.yzw.cn/portal/tender/pageWinners'
        # 获取列表页图片数据
        new_user_agent = get_random_ua()
        headers = {
            'Accept': 'application/json, text/plain, */*',
            'Accept-Encoding': 'gzip, deflate, br',
            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
            'Boxid': 'BG6Cqc+NROmZz0mlbssRSPu+MR7hgNdWoeGYqh39Twt1QtPEB46dXwlgA3nNnaNJLthYvMNAd1fCk1KOIMH/kOA==',
            'Content-Length': '63',
            'Content-Type': 'application/json',
            'Cookie': 'yzw-auac-token=7T37OMnWmNbc9GWB9yrP3Z01fetxSzVLIO%2Bcwm54Hd0LMxUaV2NAMhfqaxbpEMNqlFALYM3Db%2FVUaiJ0sjhlqg%3D%3D; LoginRequestKey=3E3D740621312E802EFA2539E916237D66E35554B9378271E0793311458E409A286D5E7C7FDDA90BA3AFDBE59925BB91FE4B85BEE9E2C7F85D400DD8A87F7381A891B14446194F08B107FBCB052808580AFFCD372434D278BC5D3C0EF9CDA9FD8814B21E6ACD8F51A306433F885D2CEA45BE1C5FA934A30D86357727A8C27C32; web.auth.yzw=D2773E432B9AFF6EC8B7B81DA01710DD21B03095F325EE3ABD3B535C69906CE725085D7208A8F76FFACD9BD3D3079E9A8310CDCCB1C19CF1031D5B8CD95056A8939AFD057416591CCA52DECA175E67F59A587EBC8857B332955E3A17F5D496D1B20CB13678F8A552FF601B2A96392FCC56943902B731A532833326619FC5754784907DD5381CE5A81263E63D16E42BF52BA2829D2AC317E516ABA6F22683A30010CF0FF8FF0F83367EA114F58A101E2AEBA051528D248B8E91F6AF7FD4683CF0C1FF4807A3F3AA35D38701546745CB996321D086AA0CE1B5283803249A396628B42892A223A7561A0141341BC6CD042D63B2079811BF2DC5519FB113498E8CF889AA2B300A92324CA8024BAAD18B73D0866C6AF46E9A3341922631876BE524BF23003EEC50C1E399788BB430D15842885C4813FAF461806D931798471E7CD70FA7AE6114A3290D6EFB7DFF9EF4259F76484B36D0E271F146B64065DECCBC4607AF5D29AA6F3B21ADA171E59EC1E8DC9BCD968C65BAFCE0C427506308DA27C8FA887B1FEF7B371DBEADFB1D751BE19897A00F94C43849264974B494A0637369C09E2E0F9DC50C0238969F02FD9A725931A225340019C37E87A51F20060319A0224C521B8739091BBC8B9F89F34948CB6504C15C128F32B8A363C086578817A6B5B6F7F3FE52E600B334A73584C21134F1B3CADE83A3140C41C0D5FF4BAF19B53E0708ED2EBD198FD61124845F7A55BAA8592F6F88942CE90E462293401F24CBFA1BAFAB878F5FB53C4203DC569FD7FBED17A36715F4298B3496BE2BB5EBFC66721DC82FBD592965F8E00AEF9C7997D549C0D8C0AF98129FA43A10A0C47D50A12F83492FB7C4CA4A3833691EAA171B907F55FFEA1528C6F4DC30E95399ED2234828E5D398E56AD17E33B0004DE9969EB5D6C41696FF69DD0521B4466CB85F23ECE52BB26599C95E54DEC8C48305BB8654EF0EA79C1A31D22F101C4CA06F7E7CDBFFE8159C2BD979AE3B8A4FA76ADDB58781AF98B75363AE59F3703E595658D2E4E3AEAF5FFA3CA6D2F3F056D9B24CC; HWWAFSESTIME=1706067575419; HWWAFSESID=33a877f6b84acb1065',
            'Origin': 'https://xy.yzw.cn',
            'Referer': 'https://xy.yzw.cn/sj/win-bid',
            'Sec-Fetch-Dest': 'empty',
            'Sec-Fetch-Mode': 'cors',
            'Sec-Fetch-Site': 'same-site',
            'User-Agent': new_user_agent,
            'X-Auth': 'eWskjZXBTON3Q1Ip2rOpK0YsvWWiQw8fzZ1AHTdGKBzHIvFoMxJOhAgnlE5lQtkjmJGnLSEM06hn1ofcL6j76L+wRuGv8CmN/+Aw5do7NkWnELuY22ubnh7Ic4HM+DCdFzjLSdthbGufKY3f3cFVFRZO0hxTp0J1cpuny8tfmmM=',
            'X-Yzw-Auth-Token': '7T37OMnWmNbc9GWB9yrP3Z01fetxSzVLIO%2Bcwm54Hd0LMxUaV2NAMhfqaxbpEMNqlFALYM3Db%2FVUaiJ0sjhlqg%3D%3D'
        }
        response = requests.post(tender_url, headers=headers, json=data,proxies=proxy)
        print('response', response.status_code)
        response.encoding = 'utf-8'
        json_data = response.json()
        # print('json_data', json_data)
        return json_data

    def get_total_num(self, data):
        res = data.get('data', None)
        if not res:
            return False, None, None
        total_num = res.get('totalCount', None)
        records = res.get('records', None)
        return True, total_num, records

    def parse_data(self,records):
        for record in records:
            data={}
            data['source'] = record.get('source', None)
            tender_code=record.get('tenderCode', None)
            data["tender_code"] = tender_code
            print('data', data)
            # redsi去重
            res = self.distinct_redis(tender_code)
            if res:
                continue
            # 保存redis队列
            queue_name = 'yzw_tender_queue'
            q = connect_message_queue(queue_name, url=self.redis_url, maxsize=10000, lazy_limit=True)
            q.put(data)



    def run(self, num,page_size, area_code):
        data = {
            "pageNum": num,
            "pageSize": page_size,
            "param":{"areaCode": area_code, }
        }
        try:
            response = self.get_tender(data)
            res, total_num, records = yzw.get_total_num(response)
            total_page_num = math.ceil(int(total_num) / page_size)
        except Exception as e:
            retry_url_queue_name = 'yzw_tender_retry_queue'
            retry_data = area_code+ '_' + str(num)
            retry_result = self.distinct_retry_data_redis(retry_data)
            if retry_result:
                return False,None # 重复的数据不再重试
            q = connect_message_queue(retry_url_queue_name, url=self.redis_url, maxsize=10000, lazy_limit=True)
            q.put(data)
            return False,None # 重试的数据放入redis

        if not res:
            return False,None# 没有数据
        yzw.parse_data(records)
        return True, total_page_num



# yzw = YzwTender()
# page_size = 100
# area_codes = ['110000000000','370000000000', '410000000000', '120000000000','620000000000','630000000000', '640000000000', '650000000000','7785', '7806','7759','4341','420000000000','430000000000','440000000000', '450000000000','460000000000','500000000000','510000000000', '520000000000','530000000000','540000000000','610000000000', '130000000000','140000000000','150000000000','210000000000','220000000000','230000000000','310000000000','320000000000','330000000000','340000000000', '350000000000','360000000000']
# for area_code in area_codes:
#     n = 1
#     ok,total_num = yzw.run(n, page_size, area_code)
#     if not ok:
#         continue
#     for i in range(n+1, total_num+1):
#         o,num = yzw.run(i, page_size, area_code)
#         ran = random.randint(1, 3)
#         time.sleep(ran)
#         print('{}第{}页完成'.format(area_code,i))
#         if not o:
#             continue



yzw = YzwTender()
ok,total_num = yzw.run(15, 100, '440000000000')









